{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_cscd = pd.read_csv('cscd.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_1 = pd.read_csv('wf_1.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_2 = pd.read_csv('wf_2.csv',index_col=0)\n",
    "df_wf_3 = pd.read_csv('wf_3.csv',index_col=0)\n",
    "df_vip= pd.read_csv('vip.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_2 = df_wf_2.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['paper_id', 'title', 'abstract', 'keyword', 'classification',\n",
      "       'author_name'],\n",
      "      dtype='object')\n",
      "Index(['paper_id', 'title', 'author_name', 'classification', 'keyword',\n",
      "       'abstract'],\n",
      "      dtype='object')\n",
      "Index(['文章ID', '中文标题', '中文作者', '分类号', '中文关键词', '中文摘要'], dtype='object')\n",
      "Index(['RemarkID', 'Title_C', 'Keyword_C', 'Remark_C', 'Class'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "print(df_cscd.columns)\n",
    "print(df_wf_1.columns)\n",
    "print(df_wf_3.columns)\n",
    "print(df_vip.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_1.columns = ['paper_id','title','author_name','classification','keyword','abstract']\n",
    "df_wf_2.columns = ['paper_id','title','author_name','classification','keyword','abstract']\n",
    "df_wf_3.columns = ['paper_id','title','author_name','classification','keyword','abstract']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_1 = pd.concat((df_wf_1,df_wf_2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-56-72364fc63ebd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mcount\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf_cscd\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'title'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0;32mif\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mt_wf_1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m         \u001b[0mcount\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "t_wf_1 = df_wf_1['title'].to_list()\n",
    "count = 0\n",
    "for t in df_cscd['title'].to_list():\n",
    "    if t in t_wf_1:\n",
    "        count += 1\n",
    "print(count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_cscd = df_cscd.drop_duplicates(['title','author_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_wf_1 = df_wf_1.drop_duplicates(['title','author_name'])\n",
    "df_wf_3 = df_wf_3.drop_duplicates(['title','author_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.concat((df_cscd,df_wf_1,df_wf_3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop_duplicates(['title','author_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>paper_id</th>\n",
       "      <th>title</th>\n",
       "      <th>abstract</th>\n",
       "      <th>keyword</th>\n",
       "      <th>classification</th>\n",
       "      <th>author_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6059470</td>\n",
       "      <td>刺老苞根皮含药血清对原代成骨细胞Wnt /β-catenin 信号通路的影响</td>\n",
       "      <td>该文研究了不同浓度刺老苞根皮含药血清对原代成骨细胞Wnt /β-catenin信号通路中β-...</td>\n",
       "      <td>刺老苞根皮;;原代成骨细胞;;Wnt /β-catenin;;信号通路</td>\n",
       "      <td>R285</td>\n",
       "      <td>依香叫</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6060347</td>\n",
       "      <td>UPLC 测定不同产地半夏药材及其炮制品中的草酸钙</td>\n",
       "      <td>目的采用UPLC法测定半夏药材及其炮制品中草酸钙的含量。方法色谱柱为Acquity UPLC...</td>\n",
       "      <td>半夏;;超高效液相色谱法;;法半夏;;姜半夏;;清半夏;;草酸钙;;草酸;;测定;;质量控制</td>\n",
       "      <td>R917</td>\n",
       "      <td>何丹</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6060337</td>\n",
       "      <td>野山杏果肉总有机酸对高脂血症小鼠血脂及肝脏的影响</td>\n",
       "      <td>目的研究野山杏果肉总有机酸(TOAWA)对高脂血症小鼠血脂的影响及其保肝作用。方法ig给予小...</td>\n",
       "      <td>野山杏;;总有机酸;;高血脂症;;保护肝脏;;总胆固醇;;低密度脂蛋白- 胆固醇;;高密度脂...</td>\n",
       "      <td>R96</td>\n",
       "      <td>阿依姑丽·艾合麦提</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6060346</td>\n",
       "      <td>抗非小细胞肺癌新型药物YL725的含量测定及稳定性研究</td>\n",
       "      <td>目的采用RP - HPLC法测定新型抗非小细胞肺癌小分子药物YL725的含量,并考察其稳定性...</td>\n",
       "      <td>非小细胞肺癌;;YL725;;反相高效液相色谱法;;含量测定;;稳定性;;影响因素试验</td>\n",
       "      <td>R917</td>\n",
       "      <td>宋春丽</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6060728</td>\n",
       "      <td>魔芋低聚糖对结肠炎大鼠肠道菌群的影响</td>\n",
       "      <td>目的:研究魔芋低聚糖(KOS)对溃疡性结肠炎大鼠肠道菌群结构的影响,并探讨其改善大鼠结肠炎症...</td>\n",
       "      <td>魔芋低聚糖;;溃疡性结肠炎;;肠道菌群;;短链脂肪酸</td>\n",
       "      <td>R151</td>\n",
       "      <td>刘瑞雪</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37122</th>\n",
       "      <td>zyyxyxb201805020</td>\n",
       "      <td>妇科恶性肿瘤术后化疗患者癌症复发恐惧及其影响因素调查研究</td>\n",
       "      <td>目的 探讨妇科恶性肿瘤术后化疗患者癌症复发恐惧(fear of cancer recurre...</td>\n",
       "      <td>妇科恶性肿瘤%复发恐惧%焦虑%抑郁%社会支持</td>\n",
       "      <td>R737.3</td>\n",
       "      <td>廖玲玲%王志毅%胡静</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37123</th>\n",
       "      <td>zyyxyxb201805021</td>\n",
       "      <td>工作压力对医学规培生主观幸福感的影响:心理资本的中介作用</td>\n",
       "      <td>目的 探讨工作压力对医学规培生主观幸福感的影响及心理资本的中介作用.方法 采用总体幸福感量表...</td>\n",
       "      <td>主观幸福感%心理资本%工作压力</td>\n",
       "      <td>R192.3</td>\n",
       "      <td>刘志军%孙文彦%李现群%余韵聪%王宪宏%胡志川%龙艺</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37124</th>\n",
       "      <td>zyyxyxb201805023</td>\n",
       "      <td>光遗传学技术在中枢神经系统疾病中的应用进展</td>\n",
       "      <td>人类大脑结构和功能极其复杂,而中枢神经系统疾病也极大影响着人类的生存质量.光遗传学技术的出现...</td>\n",
       "      <td>光遗传学%中枢神经系统疾病%术后认知功能障碍</td>\n",
       "      <td>R742</td>\n",
       "      <td>林夏妃%朱昭琼</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37125</th>\n",
       "      <td>zyyxyxb201805024</td>\n",
       "      <td>造影剂肾病发病机制的研究进展</td>\n",
       "      <td>随着现代造影技术的广泛开展,造影剂肾病的发生日益增多,其与感染性急性肾损伤、药物性急性肾损伤...</td>\n",
       "      <td>造影剂肾病%氧化应激%自噬</td>\n",
       "      <td>R692.6</td>\n",
       "      <td>罗婷%陈燕玲</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37126</th>\n",
       "      <td>zzgydxxb201806016</td>\n",
       "      <td>基于脑电图的三分类前臂运动方向解析</td>\n",
       "      <td>针对基于非侵入式脑机接口技术的右臂运动方向的判别问题,采用自主运动实验范式,将右臂自主运动脑...</td>\n",
       "      <td>EEG%右臂%运动方向%WPD%CSP</td>\n",
       "      <td>R318.04%TP391.4</td>\n",
       "      <td>逯鹏%张利朋%胡玉霞%陈书立%李新建</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1103173 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                paper_id                                   title  \\\n",
       "0                6059470  刺老苞根皮含药血清对原代成骨细胞Wnt /β-catenin 信号通路的影响   \n",
       "1                6060347               UPLC 测定不同产地半夏药材及其炮制品中的草酸钙   \n",
       "2                6060337                野山杏果肉总有机酸对高脂血症小鼠血脂及肝脏的影响   \n",
       "3                6060346             抗非小细胞肺癌新型药物YL725的含量测定及稳定性研究   \n",
       "4                6060728                      魔芋低聚糖对结肠炎大鼠肠道菌群的影响   \n",
       "...                  ...                                     ...   \n",
       "37122   zyyxyxb201805020            妇科恶性肿瘤术后化疗患者癌症复发恐惧及其影响因素调查研究   \n",
       "37123   zyyxyxb201805021            工作压力对医学规培生主观幸福感的影响:心理资本的中介作用   \n",
       "37124   zyyxyxb201805023                   光遗传学技术在中枢神经系统疾病中的应用进展   \n",
       "37125   zyyxyxb201805024                          造影剂肾病发病机制的研究进展   \n",
       "37126  zzgydxxb201806016                       基于脑电图的三分类前臂运动方向解析   \n",
       "\n",
       "                                                abstract  \\\n",
       "0      该文研究了不同浓度刺老苞根皮含药血清对原代成骨细胞Wnt /β-catenin信号通路中β-...   \n",
       "1      目的采用UPLC法测定半夏药材及其炮制品中草酸钙的含量。方法色谱柱为Acquity UPLC...   \n",
       "2      目的研究野山杏果肉总有机酸(TOAWA)对高脂血症小鼠血脂的影响及其保肝作用。方法ig给予小...   \n",
       "3      目的采用RP - HPLC法测定新型抗非小细胞肺癌小分子药物YL725的含量,并考察其稳定性...   \n",
       "4      目的:研究魔芋低聚糖(KOS)对溃疡性结肠炎大鼠肠道菌群结构的影响,并探讨其改善大鼠结肠炎症...   \n",
       "...                                                  ...   \n",
       "37122  目的 探讨妇科恶性肿瘤术后化疗患者癌症复发恐惧(fear of cancer recurre...   \n",
       "37123  目的 探讨工作压力对医学规培生主观幸福感的影响及心理资本的中介作用.方法 采用总体幸福感量表...   \n",
       "37124  人类大脑结构和功能极其复杂,而中枢神经系统疾病也极大影响着人类的生存质量.光遗传学技术的出现...   \n",
       "37125  随着现代造影技术的广泛开展,造影剂肾病的发生日益增多,其与感染性急性肾损伤、药物性急性肾损伤...   \n",
       "37126  针对基于非侵入式脑机接口技术的右臂运动方向的判别问题,采用自主运动实验范式,将右臂自主运动脑...   \n",
       "\n",
       "                                                 keyword   classification  \\\n",
       "0                    刺老苞根皮;;原代成骨细胞;;Wnt /β-catenin;;信号通路             R285   \n",
       "1         半夏;;超高效液相色谱法;;法半夏;;姜半夏;;清半夏;;草酸钙;;草酸;;测定;;质量控制             R917   \n",
       "2      野山杏;;总有机酸;;高血脂症;;保护肝脏;;总胆固醇;;低密度脂蛋白- 胆固醇;;高密度脂...              R96   \n",
       "3            非小细胞肺癌;;YL725;;反相高效液相色谱法;;含量测定;;稳定性;;影响因素试验             R917   \n",
       "4                             魔芋低聚糖;;溃疡性结肠炎;;肠道菌群;;短链脂肪酸             R151   \n",
       "...                                                  ...              ...   \n",
       "37122                             妇科恶性肿瘤%复发恐惧%焦虑%抑郁%社会支持           R737.3   \n",
       "37123                                    主观幸福感%心理资本%工作压力           R192.3   \n",
       "37124                             光遗传学%中枢神经系统疾病%术后认知功能障碍             R742   \n",
       "37125                                      造影剂肾病%氧化应激%自噬           R692.6   \n",
       "37126                                EEG%右臂%运动方向%WPD%CSP  R318.04%TP391.4   \n",
       "\n",
       "                      author_name  \n",
       "0                             依香叫  \n",
       "1                              何丹  \n",
       "2                       阿依姑丽·艾合麦提  \n",
       "3                             宋春丽  \n",
       "4                             刘瑞雪  \n",
       "...                           ...  \n",
       "37122                  廖玲玲%王志毅%胡静  \n",
       "37123  刘志军%孙文彦%李现群%余韵聪%王宪宏%胡志川%龙艺  \n",
       "37124                     林夏妃%朱昭琼  \n",
       "37125                      罗婷%陈燕玲  \n",
       "37126          逯鹏%张利朋%胡玉霞%陈书立%李新建  \n",
       "\n",
       "[1103173 rows x 6 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('medical_paper_merge_cscd_wf.csv')"
   ]
<<<<<<< HEAD
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/leo/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3072: DtypeWarning: Columns (0,1) have mixed types.Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('medical_paper_merge_cscd_wf.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"/data/leo/Projects/UER-py/corpora/medical_paper_merge_cscd_wf.txt\",\"w\") as f:\n",
    "    for i in range(len(df)):\n",
    "        abs = df.iloc[i][\"abstract\"]\n",
    "        if isinstance(abs,str):\n",
    "            f.write(abs)\n",
    "            f.write(\"\\n\")"
   ]
=======
>>>>>>> 13d246c963a6359e8198769fc841309a3200151d
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "目的 回顾性收集HIV/TB死亡病例,分析人类免疫缺陷病毒(HIV)感染合并结核病(TB)死亡病例的基本特点.方法 选取2015年1月1日～2016年12月31日成都市公共卫生临床医疗中心HIV/TB死亡病例42例作为研究对象,分析HIV/TB死亡病例的特征.结果 42例死亡患者,30.95%合并血行播散型肺结核;痰抗酸杆菌涂片阳性率47.61%,痰分枝杆菌培养阳性率66.61%,耐药率42.86%;合并肺外结核者24例(57.14%);CD4+T淋巴细胞计数均值(64.33±50.11)个/uL,其中耐药组CD4+T淋巴细胞计数(52.55±35.98个/uL)低于非耐药组(60.6±23.74个/uL),差异无统计学意义(P＞0.05).结论 HIV/TB死亡病例易合并肺外结核、CD4+T细胞多≤100个/uL等特征,其危险因素是抗结核及抗病毒治疗滞后、CD4+T淋巴细胞计数≤100个/uL,尽早开始抗结核及抗病毒治疗可以有效降低HIV/TB死亡率.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['回顾性收集HIV/TB死亡病例,分析人类免疫缺陷病毒(HIV)感染合并结核病(TB)死亡病例的基本特点.',\n",
       " '选取2015年1月1日～2016年12月31日成都市公共卫生临床医疗中心HIV/TB死亡病例42例作为研究对象,分析HIV/TB死亡病例的特征.',\n",
       " '42例死亡患者,30.95%合并血行播散型肺结核;痰抗酸杆菌涂片阳性率47.',\n",
       " '61%,痰分枝杆菌培养阳性率66.61%,耐药率42.',\n",
       " '86%;合并肺外结核者24例(57.14%);CD4+T淋巴细胞计数均值(64.',\n",
       " '33±50.11)个/uL,其中耐药组CD4+T淋巴细胞计数(52.',\n",
       " '55±35.98个/uL)低于非耐药组(60.',\n",
       " '6±23.74个/uL),差异无统计学意义(P＞0.',\n",
       " '05).',\n",
       " 'HIV/TB死亡病例易合并肺外结核、CD4+T细胞多≤100个/uL等特征,其危险因素是抗结核及抗病毒治疗滞后、CD4+T淋巴细胞计数≤100个/uL,尽早开始抗结核及抗病毒治疗可以有效降低HIV/TB死亡率.']"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = df.iloc[904174]['abstract']\n",
    "print(a)\n",
    "Seg_Sents_Cn(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Seg_Sents_Cn import Seg_Sents_Cn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "目的 回顾性收集HIV/TB死亡病例,分析人类免疫缺陷病毒(HIV)感染合并结核病(TB)死亡病例的基本特点.方法 选取2015年1月1日～2016年12月31日成都市公共卫生临床医疗中心HIV/TB死亡病例42例作为研究对象,分析HIV/TB死亡病例的特征.结果 42例死亡患者,30.95%合并血行播散型肺结核;痰抗酸杆菌涂片阳性率47.61%,痰分枝杆菌培养阳性率66.61%,耐药率42.86%;合并肺外结核者24例(57.14%);CD4+T淋巴细胞计数均值(64.33±50.11)个/uL,其中耐药组CD4+T淋巴细胞计数(52.55±35.98个/uL)低于非耐药组(60.6±23.74个/uL),差异无统计学意义(P＞0.05).结论 HIV/TB死亡病例易合并肺外结核、CD4+T细胞多≤100个/uL等特征,其危险因素是抗结核及抗病毒治疗滞后、CD4+T淋巴细胞计数≤100个/uL,尽早开始抗结核及抗病毒治疗可以有效降低HIV/TB死亡率.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['回顾性收集HIV/TB死亡病例,分析人类免疫缺陷病毒(HIV)感染合并结核病(TB)死亡病例的基本特点.',\n",
       " '选取2015年1月1日～2016年12月31日成都市公共卫生临床医疗中心HIV/TB死亡病例42例作为研究对象,分析HIV/TB死亡病例的特征.',\n",
       " '42例死亡患者,30.95%合并血行播散型肺结核;痰抗酸杆菌涂片阳性率47.',\n",
       " '61%,痰分枝杆菌培养阳性率66.61%,耐药率42.',\n",
       " '86%;合并肺外结核者24例(57.14%);CD4+T淋巴细胞计数均值(64.',\n",
       " '33±50.11)个/uL,其中耐药组CD4+T淋巴细胞计数(52.',\n",
       " '55±35.98个/uL)低于非耐药组(60.',\n",
       " '6±23.74个/uL),差异无统计学意义(P＞0.',\n",
       " '05).',\n",
       " 'HIV/TB死亡病例易合并肺外结核、CD4+T细胞多≤100个/uL等特征,其危险因素是抗结核及抗病毒治疗滞后、CD4+T淋巴细胞计数≤100个/uL,尽早开始抗结核及抗病毒治疗可以有效降低HIV/TB死亡率.']"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = df.iloc[904174]['abstract']\n",
    "print(a)\n",
    "Seg_Sents_Cn(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = \"30.95%合并血行播散型肺结核;痰抗酸杆菌涂片阳性率47.61%\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = [54, 129, 143, 170, 187, 197, 215, 237, 243, 271, 277, 294, 299, 320, 324, 432]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "for index in a:\n",
    "    if index == 432:\n",
    "        a.remove(index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "54\n",
      "129\n",
      "143\n",
      "170\n",
      "187\n",
      "197\n",
      "215\n",
      "237\n",
      "243\n",
      "271\n",
      "277\n",
      "294\n",
      "299\n",
      "320\n",
      "324\n"
     ]
    }
   ],
   "source": [
    "for index in a:\n",
    "    print(index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Seg_Sents_Cn import Seg_Sents_Cn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['研究姜黄素对四氯化碳诱导的大鼠急性肝损伤的保护作用及其机制。',\n",
       " '60只健康♂SD大鼠随机分成6组,即正常对照组,肝损伤模型组,阳性药物对照水飞蓟素组(100 mg·kg~(-1)),姜黄素低剂量组(25 mg·kg~(-1)),姜黄素中剂量组(50 mg·kg~(-1))和姜黄素高剂量组(100 mg·kg~(-1))。',\n",
       " '隔天灌胃给药,共30 d;末次给药1 h后,腹腔注射20 mg·kg~(-1) CCl_4玉米油溶液(2 mL·kg~(-1))造模,禁食不禁水,12 h后乌拉坦麻醉。',\n",
       " '取下腔静脉血和肝脏后,分别检测大鼠血清中丙氨酸氨基转移酶(AST)及天门冬氨酸氨基转移酶(ASL)的活性,大鼠肝脏组织中血红素加氧酶Ⅰ(HO-1)及静脉血中HbCO的水平,在体外测定姜黄素清除DPPH自由基及ABTS自由基的能力。',\n",
       " '与正常对照组相比,模型组血清中ALT、AST活性显著升高,肝组织中HO-1活性及静脉血中HbCO的含量显著降低,组织病理检查显示肝组织损伤明显增加。',\n",
       " '与模型组相比,姜黄素各剂量组可不同程度的降低血清AST及ASL的活性,增加肝脏组织中HO-1的活性及HbCO的水平,组织病理检查显示肝损伤有不同程度减轻。',\n",
       " '并且姜黄素具有清除DPPH自由基及ABTS自由基的能力。',\n",
       " '姜黄素对CCl_4诱导的大鼠急性肝损伤具有一定保护作用,其机制可能与其自身抗氧化能力及诱导HO-1及HbCO有关。']"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Seg_Sents_Cn(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 处理词表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/MedicalKG.txt','r') as f:\n",
    "    word_list = [line.split()[0] for line in f.readlines()]\n",
    "    \n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/MeSH_word_list.txt','r') as f:\n",
    "    mesh_word_list = [line.strip() for line in f.readlines()]\n",
    "\n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/MedicalKG_word_list.txt','w') as f:\n",
    "    for w in set(word_list):\n",
    "        if w not in mesh_word_list:\n",
    "            f.write(w+\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/THUOCL_medical.txt','r') as f:\n",
    "#     word_list = [line.split()[0] for line in f.readlines()]\n",
    "    \n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/MeSH_word_list.txt','r') as f:\n",
    "#     mesh_word_list = [line.strip() for line in f.readlines()]\n",
    "\n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/MedicalKG_word_list.txt','r') as f:\n",
    "#     medkg_word_list = [line.strip() for line in f.readlines()]\n",
    "\n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/medical_terms/Thu_word_list.txt','w') as f:\n",
    "#     for w in set(word_list):\n",
    "#         if w not in mesh_word_list:\n",
    "#             if w not in medkg_word_list:\n",
    "#                 f.write(w+\"\\n\")\n",
    "\n",
    "\n",
    "\n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/word_list/mesh_word_list_all_cn.txt','r') as f:\n",
    "#     mesh_word_list_all = [line.strip() for line in f.readlines()]\n",
    "\n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/word_list/mesh_crawed_first_batch.txt','r') as f:\n",
    "#     mesh_word_list_crawed = [line.strip() for line in f.readlines()]\n",
    "\n",
    "# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/word_list/mesh_word_list_buchong.txt','w') as f:\n",
    "#     for w in set(mesh_word_list_all):\n",
    "#         if w not in mesh_word_list_crawed:\n",
    "#                 f.write(w+\"\\n\")\n",
    "\n",
    "import pandas as pd\n",
    "df = pd.read_excel('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/entities_sougou_ding.xlsx')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sougou_words =  df.term.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/word_list/word_list_all.txt','r') as f:\n",
    "    mword_list_all = [line.strip() for line in f.readlines()]\n",
    "\n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/word_list/sougou_word_list.txt','w') as f:\n",
    "    for w in set(sougou_words):\n",
    "        if w not in mword_list_all:\n",
    "                f.write(w+\"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 处理sougou词库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/entities_sougou_ding.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'人体组织', '医学仪器', '手术', '未分类', '检查科目', '疾病', '症状', '穴位', '药物'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "import pandas as pd\n",
    "df = pd.read_excel('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/entities_sougou_ding.xlsx')\n",
    "\n",
    "label_dict = {'人体组织':'身体', '医学仪器':'器材', '手术':'治疗方法', '疾病':'疾病', '穴位':'身体', '药物':'药物'}\n",
    "import json\n",
    "\n",
    "with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/clean_data.json','r') as f:\n",
    "    term_dict=json.load(f)\n",
    "\n",
    "count = 0\n",
    "for key,value in label_dict.items():\n",
    "    label_words = df.loc[df['category'] == key]['term'].tolist()\n",
    "    for word in label_words:\n",
    "        if word not in term_dict[value]:\n",
    "            count += 1\n",
    "            \n",
    "    \n",
    "\n",
    "    \n",
    "\n",
    "\n",
    "for\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['艾灸',\n",
       " '艾森克人格评定',\n",
       " '癌症高热治疗',\n",
       " '按摩手法治疗',\n",
       " '鞍区病损切除术',\n",
       " '暗适应检查',\n",
       " '安装牙托',\n",
       " '奥狄氏括约肌扩张术',\n",
       " '奥狄氏括约肌切开术',\n",
       " '拔甲术',\n",
       " '拔牙术',\n",
       " '白内障超声乳化抽吸术',\n",
       " '白内障囊内摘除术',\n",
       " '白内障囊外摘除术',\n",
       " '白内障切割吸出术',\n",
       " '白内障针吸术',\n",
       " '白细胞治疗性去除',\n",
       " '半侧骨盆切除术',\n",
       " '半侧颜面萎缩矫正术',\n",
       " '板层角膜成形术伴自体移植物',\n",
       " '板层角膜移植术',\n",
       " '半规管开窗术',\n",
       " '瘢痕松解术',\n",
       " '半喉切除术',\n",
       " '半月神经节切除术',\n",
       " '包皮瘢痕切除术',\n",
       " '包皮病损切除术',\n",
       " '包皮环切术',\n",
       " '包皮切开术',\n",
       " '背部超声检查',\n",
       " '背部肌肉病损切除术',\n",
       " '背阔肌移植术',\n",
       " '杯状耳矫正术',\n",
       " '贲门病损切除术',\n",
       " '贲门部分切除伴食管胃吻合术',\n",
       " '贲门周围血管结扎术',\n",
       " '鼻部分切除术',\n",
       " '臂部皮瓣鼻重建术',\n",
       " '鼻重建术',\n",
       " '鼻唇病损切除术',\n",
       " '鼻唇沟成形术',\n",
       " '鼻唇瘘管切除术',\n",
       " '鼻唇皮瓣鼻成形术',\n",
       " '臂丛神经上中下干缝合术',\n",
       " '臂丛神经松解术',\n",
       " '臂丛神经探查术',\n",
       " '鼻道冲洗',\n",
       " '鼻电凝止血',\n",
       " '鼻窦成形术',\n",
       " '鼻窦抽吸和灌洗',\n",
       " '鼻窦骨折切开复位术',\n",
       " '鼻窦核磁共振检查',\n",
       " '鼻窦瘘修补术',\n",
       " '鼻窦切开异物取出术',\n",
       " '鼻窦透照',\n",
       " '鼻窦造口术',\n",
       " '鼻窦造影',\n",
       " '鼻功能性检查',\n",
       " '鼻骨骨折闭合复位术',\n",
       " '鼻骨骨折切开复位术',\n",
       " '闭合性颅神经活组织检查',\n",
       " '闭合性下颌支骨成形术',\n",
       " '闭合性周围神经活组织检查',\n",
       " '鼻活组织检查',\n",
       " '鼻甲部分切除术',\n",
       " '鼻甲成形术',\n",
       " '鼻甲电烧术',\n",
       " '鼻甲骨折术',\n",
       " '鼻甲激光烧灼术',\n",
       " '鼻甲冷冻切除术',\n",
       " '鼻甲射频消融术',\n",
       " '鼻甲微波烧灼术',\n",
       " '臂假肢安装',\n",
       " '臂假肢装置置入术',\n",
       " '鼻尖成形术',\n",
       " '闭孔疝修补术',\n",
       " '闭孔神经缝合术',\n",
       " '闭孔神经切断术',\n",
       " '闭孔神经吻合术',\n",
       " '鼻泪管插管术',\n",
       " '鼻泪管激光探通插管术',\n",
       " '鼻泪管扩张模置入术',\n",
       " '鼻泪管探通术',\n",
       " '鼻冷冻止血',\n",
       " '鼻裂伤缝合术',\n",
       " '鼻内病损激光烧灼术',\n",
       " '鼻内病损切除术',\n",
       " '鼻内镜下经鼻腺样体切除术',\n",
       " '鼻内镜下腺样体消融术',\n",
       " '鼻内窥镜检查',\n",
       " '鼻内窥镜下鼻窦活组织检查',\n",
       " '鼻内窥镜下鼻窦检查',\n",
       " '鼻内窥镜下鼻甲部分切除术',\n",
       " '鼻内窥镜下鼻甲成形术',\n",
       " '鼻内窥镜下鼻甲切除术',\n",
       " '鼻内窥镜下鼻泪管吻合术',\n",
       " '鼻内窥镜下鼻内病损切除术',\n",
       " '鼻内窥镜下鼻腔粘连松解术',\n",
       " '鼻内窥镜下鼻微波烧灼止血术',\n",
       " '鼻内窥镜下鼻中隔成形术',\n",
       " '鼻内窥镜下鼻中隔粘膜划痕术',\n",
       " '鼻内窥镜下鼻中隔粘膜下切除术',\n",
       " '鼻内窥镜下电凝止血术',\n",
       " '鼻内窥镜下蝶窦病损切除术',\n",
       " '鼻内窥镜下蝶窦开窗术',\n",
       " '鼻内窥镜下多个鼻窦开窗术',\n",
       " '鼻内窥镜下额窦病损切除术',\n",
       " '鼻内窥镜下额窦开窗术',\n",
       " '鼻内窥镜下钩突切除术',\n",
       " '鼻内窥镜下全组鼻窦开窗术',\n",
       " '鼻内窥镜下筛窦病损切除术',\n",
       " '鼻内窥镜下筛窦开窗术',\n",
       " '鼻内窥镜下上颌窦病损切除术',\n",
       " '鼻内窥镜下上颌窦根治术',\n",
       " '鼻内窥镜下上颌窦开窗术',\n",
       " '鼻内窥镜下上颌骨部分切除术',\n",
       " '鼻粘膜切除止血术',\n",
       " '鼻皮肤病损切除术',\n",
       " '鼻前庭病损切除术',\n",
       " '鼻腔扩张术',\n",
       " '鼻腔内异物去除',\n",
       " '鼻腔粘连松解术',\n",
       " '鼻切开探查术',\n",
       " '鼻切开异物取出术',\n",
       " '鼻切开引流术',\n",
       " '鼻清创术',\n",
       " '鼻软骨切开术',\n",
       " '鼻饲',\n",
       " '鼻死骨切除术',\n",
       " '鼻填塞物去除',\n",
       " '鼻填塞物置换',\n",
       " '鼻胃管冲洗',\n",
       " '鼻胃管置换',\n",
       " '壁细胞迷走神经切断术',\n",
       " '鼻小柱成形术',\n",
       " '鼻咽闭锁矫正术',\n",
       " '鼻咽病损切除术',\n",
       " '鼻咽活组织检查',\n",
       " '鼻咽扩张术',\n",
       " '鼻咽瘘管切除术',\n",
       " '鼻咽造影',\n",
       " '鼻翼成形术',\n",
       " '鼻植皮术',\n",
       " '鼻植入物取出术',\n",
       " '鼻中隔病损激光烧灼术',\n",
       " '鼻中隔成形术',\n",
       " '鼻中隔穿孔修补术',\n",
       " '鼻中隔粘膜下切除术',\n",
       " '鼻中隔软骨移植术',\n",
       " '辨色检查',\n",
       " '变态反应免疫治疗',\n",
       " '扁桃残体切除术',\n",
       " '扁桃体伴腺样体切除术',\n",
       " '扁桃体病损切除术',\n",
       " '扁桃体病损射频消融术',\n",
       " '扁桃体活组织检查',\n",
       " '扁桃体激光切除术',\n",
       " '扁桃体脓肿引流术',\n",
       " '扁桃体切除术',\n",
       " '扁桃体切除术后止血',\n",
       " '扁桃体周围脓肿引流术',\n",
       " '变性手术',\n",
       " '表面角膜镜片术',\n",
       " '表浅放射治疗',\n",
       " '髌骨病损切除术',\n",
       " '髌骨部分切除术',\n",
       " '髌骨重建术',\n",
       " '髌骨钢板内固定术',\n",
       " '髌骨钢针内固定术',\n",
       " '髌骨骨折闭合复位术',\n",
       " '髌骨骨折切开复位螺钉内固定术',\n",
       " '髌骨骨折切开复位术',\n",
       " '髌骨开窗引流术',\n",
       " '髌骨开放性骨折清创术',\n",
       " '髌骨螺钉内固定术',\n",
       " '髌骨内固定物取出术',\n",
       " '髌骨牵引',\n",
       " '髌骨切除术',\n",
       " '髌骨死骨去除术',\n",
       " '髌骨髓内针内固定术',\n",
       " '髌骨脱位闭合复位术',\n",
       " '髌骨外固定架去除术',\n",
       " '髌骨外固定术',\n",
       " '髌骨稳定术',\n",
       " '髌骨植骨术',\n",
       " '髌韧带重建术',\n",
       " '髌韧带缝合术',\n",
       " '病毒性脑炎疫苗预防性接种',\n",
       " '丙烯酸水泥骨空隙填充',\n",
       " '并指矫正术',\n",
       " '并趾矫正术',\n",
       " '玻璃体穿刺抽液术',\n",
       " '玻璃体腔残留晶体皮质取出术',\n",
       " '玻璃体腔硅油取出术',\n",
       " '玻璃体腔内替代物注射术',\n",
       " '玻璃体腔气液交换术',\n",
       " '玻璃体腔探查术',\n",
       " '玻璃体腔脱位晶状体取出术',\n",
       " '玻璃体腔药物注射术',\n",
       " '玻璃体腔异物取出术',\n",
       " '玻璃体诊断性抽吸',\n",
       " '玻璃体注气术',\n",
       " '玻璃体自体血清注入术',\n",
       " '剥膜引产',\n",
       " '玻尿酸注射',\n",
       " '部分肺静脉异位引流矫正术',\n",
       " '部分臀位牵引术',\n",
       " '部分咽切除术',\n",
       " '哺乳期吸奶',\n",
       " '残端皮肤修整术',\n",
       " '残角子宫切除术',\n",
       " '残留卵巢切除术',\n",
       " '残留皮质切除术',\n",
       " '残留输卵管卵巢切除术',\n",
       " '残留输卵管切除术',\n",
       " '残胃切除术',\n",
       " '残余胆囊管切除术',\n",
       " '残余胆囊切除术',\n",
       " '残余喉切除术',\n",
       " '残余甲状腺大部切除术',\n",
       " '残余甲状腺切除术',\n",
       " '残余肾切除术',\n",
       " '残余子宫颈切除术',\n",
       " '苍白球切开术',\n",
       " '苍白球射频毁损术',\n",
       " '藏毛囊肿切除术',\n",
       " '藏毛囊肿切开术',\n",
       " '侧腱束切断术',\n",
       " '侧脑室病损切除术',\n",
       " '插入导尿管',\n",
       " '产后宫腔手法探查',\n",
       " '产后子宫颈探查术',\n",
       " '产科会阴血肿去除术',\n",
       " '产科内翻子宫手法复位',\n",
       " '产科内翻子宫手术矫正术',\n",
       " '产科外阴血肿去除术',\n",
       " '产科阴道血肿去除术',\n",
       " '产科子宫填塞',\n",
       " '产钳旋转胎头',\n",
       " '产前训练',\n",
       " '肠插管减压',\n",
       " '肠淋巴干小静脉吻合术',\n",
       " '肠内高营养',\n",
       " '肠套迭复位',\n",
       " '肠外置段的切除术',\n",
       " '常温人工冬眠治疗',\n",
       " '肠系膜病损切除术',\n",
       " '肠系膜动脉结扎术',\n",
       " '肠系膜固定术',\n",
       " '肠系膜活组织检查',\n",
       " '肠系膜静脉缝合术',\n",
       " '肠系膜静脉腔静脉分流术',\n",
       " '肠系膜静脉下腔静脉分流术',\n",
       " '肠系膜静脉造影',\n",
       " '肠系膜淋巴管瘤囊肿切除术',\n",
       " '肠系膜淋巴结根治性切除术',\n",
       " '肠系膜淋巴结切除术',\n",
       " '肠系膜上动脉血栓切除术',\n",
       " '肠系膜上动脉造影',\n",
       " '肠系膜上动脉支架置入术',\n",
       " '肠系膜上静脉血栓切除术',\n",
       " '肠系膜修补术',\n",
       " '肠造口冲洗',\n",
       " '肠造口扩张',\n",
       " '肠造口脱垂手法复位',\n",
       " '肠造瘘口指检',\n",
       " '肠粘连松解术',\n",
       " '超声热疗法',\n",
       " '超声心动图',\n",
       " '超声引导下电吸人流',\n",
       " '超引导下胆管穿刺引流术',\n",
       " '超引导下胆囊穿刺引流术',\n",
       " '超引导下肝病损射频消融术',\n",
       " '超引导下肝穿刺活组织检查',\n",
       " '超引导下经皮经肝胆管引流术',\n",
       " '超引导下盆腔穿刺术',\n",
       " '超引导下前列腺穿刺',\n",
       " '超引导下肾病损射频消融术',\n",
       " '超引导下肾穿刺活组织检查',\n",
       " '超引导下胸腔穿刺术',\n",
       " '超引导下羊水减量',\n",
       " '超引导下痔结扎术',\n",
       " '成份血细胞输入',\n",
       " '齿槽骨折闭合复位内固定术',\n",
       " '尺动脉结扎术',\n",
       " '尺动脉吻合术',\n",
       " '尺骨病损切除术',\n",
       " '耻骨病损切除术',\n",
       " '尺骨部分切除术',\n",
       " '耻骨部分切除术',\n",
       " '尺骨成形术',\n",
       " '尺骨钢板内固定术',\n",
       " '尺骨钢针内固定术',\n",
       " '尺骨骨骺分离闭合复位术',\n",
       " '尺骨骨折闭合复位钢针内固定术',\n",
       " '尺骨骨折闭合复位螺钉内固定术',\n",
       " '尺骨骨折闭合复位术',\n",
       " '尺骨骨折切开复位钢板内固定术',\n",
       " '尺骨骨折切开复位钢针内固定术',\n",
       " '尺骨骨折切开复位螺钉内固定术',\n",
       " '尺骨骨折切开复位术',\n",
       " '耻骨后尿道悬吊术',\n",
       " '耻骨后膀胱前前列腺切除术',\n",
       " '耻骨后前列腺切除术',\n",
       " '耻骨后探查术',\n",
       " '尺骨开放性骨折清创术',\n",
       " '尺骨螺钉内固定术',\n",
       " '尺骨内固定物取出术',\n",
       " '尺骨切除术',\n",
       " '耻骨切开助产',\n",
       " '尺骨取骨术',\n",
       " '尺骨人工骨植骨术',\n",
       " '耻骨上经膀胱前列腺切除术',\n",
       " '耻骨上膀胱造口导尿管插入术',\n",
       " '耻骨上悬吊尿道膀胱固定术',\n",
       " '耻骨疏韧带悬吊术',\n",
       " '尺骨死骨去除术',\n",
       " '尺骨髓内针内固定术',\n",
       " '尺骨头切除术',\n",
       " '尺骨外固定架去除术',\n",
       " '尺骨外固定术',\n",
       " '尺骨楔形截骨术',\n",
       " '尺骨延长术',\n",
       " '尺骨折骨术',\n",
       " '耻骨直肠肌部分切断术',\n",
       " '尺骨植骨术',\n",
       " '齿钳拔牙',\n",
       " '尺桡关节脱位切开复位术',\n",
       " '尺神经缝合术',\n",
       " '尺神经松解术',\n",
       " '尺神经探查术',\n",
       " '尺神经吻合术',\n",
       " '尺神经延迟修补术',\n",
       " '尺神经移位术',\n",
       " '尺神经移值术',\n",
       " '重建鼻修正术',\n",
       " '重建眉修整术',\n",
       " '重组骨形态形成蛋白置入术',\n",
       " '除毛术',\n",
       " '处女膜缝合术',\n",
       " '处女膜切除术',\n",
       " '处女膜切开术',\n",
       " '穿透性角膜移植术',\n",
       " '穿透性自体角膜移植术',\n",
       " '垂体病损切除术',\n",
       " '垂体活组织检查',\n",
       " '垂体切除术',\n",
       " '垂体窝探查术',\n",
       " '垂直喉切除术',\n",
       " '锤状趾矫正术',\n",
       " '槌状指修补术',\n",
       " '唇瘢痕松解术',\n",
       " '唇病损广泛切除术',\n",
       " '唇病损激光烧灼术',\n",
       " '唇病损切除术',\n",
       " '唇成形术',\n",
       " '唇活组织检查',\n",
       " '唇颊沟延伸术',\n",
       " '唇裂伤缝合术',\n",
       " '唇裂修补术',\n",
       " '唇裂再修补术',\n",
       " '唇瘘管修补术',\n",
       " '唇皮瓣移植术',\n",
       " '唇全厚植皮术',\n",
       " '唇系带切断术',\n",
       " '唇腺自体移植术',\n",
       " '唇中厚植皮术',\n",
       " '催产素引产',\n",
       " '催眠疗法',\n",
       " '大肠病损破坏术',\n",
       " '大肠病损切除术',\n",
       " '大肠部分切除用于间置术',\n",
       " '大肠多节段切除术',\n",
       " '大肠局部灌注',\n",
       " '大肠瘘修补术',\n",
       " '大肠内异物去除',\n",
       " '大肠扭转复位术',\n",
       " '大肠切开探查术',\n",
       " '大肠切开异物取出术',\n",
       " '大肠外置段的切除术',\n",
       " '大肠外置术',\n",
       " '大肠吻合修正术',\n",
       " '大肠造口导管置换',\n",
       " '大脑半球切除术',\n",
       " '大脑病损切除术',\n",
       " '大脑皮层粘连松解术',\n",
       " '大脑清创术',\n",
       " '大腿截断术',\n",
       " '大腿再植术',\n",
       " '大网膜包肝术',\n",
       " '大网膜包肾术',\n",
       " '大网膜病损切除术',\n",
       " '大网膜部分切除术',\n",
       " '大网膜动脉结扎术',\n",
       " '大网膜还纳术',\n",
       " '大网膜内移植术',\n",
       " '大网膜切除术',\n",
       " '大网膜修补术',\n",
       " '大血管转位矫正术',\n",
       " '大阴唇病损切除术',\n",
       " '大隐静脉剥脱术',\n",
       " '大隐静脉插管术',\n",
       " '大隐静脉高位结扎剥脱术',\n",
       " '大隐静脉肱动脉搭桥术',\n",
       " '大隐静脉股动脉搭桥术',\n",
       " '大隐静脉结扎术',\n",
       " '大隐静脉主干激光闭合术',\n",
       " '带蒂肠片肝管成形术',\n",
       " '带蒂皮瓣断蒂术',\n",
       " '带蒂皮瓣去脂术',\n",
       " '带蒂皮瓣徙前术',\n",
       " '带蒂皮瓣修整术',\n",
       " '带蒂皮瓣延迟术',\n",
       " '带蒂皮瓣移植术',\n",
       " '单侧肺移植术',\n",
       " '单侧腹股沟疝无张力修补术',\n",
       " '单侧腹股沟疝修补术',\n",
       " '单侧腹股沟斜疝疝囊高位结扎术',\n",
       " '单侧腹股沟斜疝无张力修补术',\n",
       " '单侧腹股沟斜疝修补术',\n",
       " '单侧腹股沟直疝无张力修补术',\n",
       " '单侧腹股沟直疝修补术',\n",
       " '单侧腹疝无张力修补术',\n",
       " '单侧睾丸部分切除术',\n",
       " '单侧睾丸附睾切除术',\n",
       " '单侧睾丸切除术',\n",
       " '单侧股疝无张力修补术',\n",
       " '单侧股疝修补术',\n",
       " '单侧甲状腺部分切除术',\n",
       " '单侧甲状腺次全切除术',\n",
       " '单侧甲状腺切除术',\n",
       " '单侧颈淋巴结根治性清扫术',\n",
       " '单侧卵巢切除术',\n",
       " '单侧内收肌髂腰肌切断术',\n",
       " '单侧皮下乳房切除伴假体置入术',\n",
       " '单侧皮下乳房切除术',\n",
       " '单侧乳房改良根治术',\n",
       " '单侧乳房根治性切除术',\n",
       " '单侧乳房假体置入术',\n",
       " '单侧乳房扩大根治性切除术',\n",
       " '单侧乳房切除术',\n",
       " '单侧乳房缩小成形术',\n",
       " '单侧乳房注射隆胸术',\n",
       " '单侧乳内动脉冠状动脉搭桥术',\n",
       " '单侧肾切除术',\n",
       " '单侧肾上腺大部分切除术',\n",
       " '单侧肾上腺切除术',\n",
       " '单侧肾上腺区探查术',\n",
       " '单侧输卵管挤压术',\n",
       " '单侧输卵管结扎术',\n",
       " '单侧输卵管卵巢切除术',\n",
       " '单侧输卵管切除术',\n",
       " '单侧外阴切除术',\n",
       " '单侧隐睾切除术',\n",
       " '胆肠吻合口切开取石术',\n",
       " '单纯淋巴结切除术',\n",
       " '胆道内假体置换术',\n",
       " '胆道切开探查术',\n",
       " '单道人工耳蜗置换术',\n",
       " '单道人工耳蜗置入术',\n",
       " '胆道吻合修正术',\n",
       " '胆道引流管置换',\n",
       " '胆道支架置换',\n",
       " '单根导管冠状动脉造影',\n",
       " '胆管病损切除术',\n",
       " '胆管成形术',\n",
       " '胆管冲洗',\n",
       " '胆管肝管空肠吻合术',\n",
       " '胆管根治切除术',\n",
       " '胆管假体装置去除术',\n",
       " '胆管空肠吻合口闭合术',\n",
       " '胆管空肠吻合术',\n",
       " '胆管瘘口修补术',\n",
       " '胆管切开取石术',\n",
       " '胆管十二指肠吻合术',\n",
       " '胆管探查术',\n",
       " '胆管体外碎石',\n",
       " '胆管胃吻合术',\n",
       " '胆管吻合术',\n",
       " '胆管修补术',\n",
       " '胆管引流管去除',\n",
       " '胆管引流术',\n",
       " '胆管造影',\n",
       " '胆管支架去除',\n",
       " '胆管支架置入术',\n",
       " '单科会诊',\n",
       " '单克隆抗体治疗',\n",
       " '单列神经刺激脉冲发生器置换术',\n",
       " '单列神经刺激脉冲发生器置入术',\n",
       " '胆囊部分切除术',\n",
       " '胆囊动脉结扎术',\n",
       " '胆囊肝管吻合术',\n",
       " '胆囊根治性切除术',\n",
       " '胆囊空肠瘘切除术',\n",
       " '胆囊空肠吻合术',\n",
       " '胆囊扩大切除术',\n",
       " '胆囊瘘修补术',\n",
       " '胆囊破裂修补术',\n",
       " '胆囊切除术',\n",
       " '胆囊切开引流术',\n",
       " '胆囊十二指肠吻合术',\n",
       " '胆囊体外碎石',\n",
       " '胆囊胃瘘修补术',\n",
       " '胆囊胃吻合术',\n",
       " '胆囊引流术',\n",
       " '胆囊造口闭合术',\n",
       " '胆囊造口冲洗',\n",
       " '胆囊造口导管去除',\n",
       " '胆囊造口术',\n",
       " '胆囊造影',\n",
       " '单腔永久起搏器置换术',\n",
       " '单腔永久起搏器置入术',\n",
       " '单心房矫治术',\n",
       " '单一血管的操作',\n",
       " '单源光子放射治疗',\n",
       " '胆总管病损切除术',\n",
       " '胆总管肠吻合口拆除术',\n",
       " '胆总管空肠吻合术',\n",
       " '胆总管瘘修补术',\n",
       " '胆总管切除术',\n",
       " '胆总管切开取石术',\n",
       " '胆总管切开异物取出术',\n",
       " '胆总管切开引流术',\n",
       " '胆总管球囊扩张术',\n",
       " '胆总管十二指肠后壁吻合术',\n",
       " '胆总管十二指肠吻合术',\n",
       " '胆总管损伤修补术',\n",
       " '胆总管探查术',\n",
       " '胆总管胃空肠吻合术',\n",
       " '胆总管胃吻合术',\n",
       " '胆总管修补术',\n",
       " '胆总管造影',\n",
       " '刀放射治疗',\n",
       " '导联矫正术',\n",
       " '导尿管冲洗',\n",
       " '导尿管去除',\n",
       " '导水管粘连松解术',\n",
       " '蹬车运动试验',\n",
       " '等份联体双胎分离术',\n",
       " '镫骨板钻孔术',\n",
       " '镫骨部分切除伴脂肪移植术',\n",
       " '镫骨部分切除术',\n",
       " '镫骨切除伴砧骨置换术',\n",
       " '镫骨切除术',\n",
       " '镫骨切除术伴砧骨置换的修正术',\n",
       " '镫骨切除术的修正术',\n",
       " '镫骨松动术',\n",
       " '镫骨再撼动术',\n",
       " '骶部脓肿切开引流术',\n",
       " '骶丛神经缝合术',\n",
       " '骶丛神经探查术',\n",
       " '骶骨病损切除术',\n",
       " '骶骨部分切除术',\n",
       " '骶骨切除术',\n",
       " '骶骨韧带切断术',\n",
       " '骶髂关节病损切除术',\n",
       " '骶髂关节固定术',\n",
       " '骶前病损切除术',\n",
       " '骶前交感神经切除术',\n",
       " '骶韧带缩短术',\n",
       " '骶韧带悬吊术',\n",
       " '第三脑室病损切除术',\n",
       " '骶神经刺激电极取出术',\n",
       " '骶神经电刺激器置入术',\n",
       " '骶神经松解术',\n",
       " '第四脑室病损切除术',\n",
       " '骶尾部病损切除术',\n",
       " '低位产钳伴会阴切开术',\n",
       " '低位产钳术',\n",
       " '低位子宫下段剖宫产',\n",
       " '骶椎病损切除术',\n",
       " '骶椎植骨术',\n",
       " '电除颤',\n",
       " '电磁助听器置入术',\n",
       " '碘放射性同位素近距离治疗',\n",
       " '碘放射性同位素远距离治疗',\n",
       " '碘放射性同位素注射治疗',\n",
       " '电离子透入疗法',\n",
       " '电吸刮宫术',\n",
       " '电吸人流术',\n",
       " '电针经络氧疗法',\n",
       " '电针脉冲疗法',\n",
       " '电子肠镜下结肠息肉切除术',\n",
       " '电子结肠镜检查',\n",
       " '电子膀胱刺激器置入术',\n",
       " '电子输尿管刺激器去除术',\n",
       " '电子输尿管刺激器置换术',\n",
       " '电子输尿管刺激器置入术',\n",
       " '电子远距离放射治疗',\n",
       " '蝶窦切除术',\n",
       " '蝶腭神经节切除术',\n",
       " '蝶骨电极插入术',\n",
       " '顶叶病损切除术',\n",
       " '动静脉瘘夹闭术',\n",
       " '动静脉瘘结扎术',\n",
       " '动静脉瘘切断术',\n",
       " '动脉插管术',\n",
       " '动脉穿刺术',\n",
       " '动脉导管结扎术',\n",
       " '动脉导管伞堵术',\n",
       " '动脉导管未闭切断缝合术',\n",
       " '动脉合成补片修补术',\n",
       " '动脉瘤包裹术',\n",
       " '动脉瘤夹闭术',\n",
       " '动脉瘤破裂修补术',\n",
       " '动脉瘤切除伴吻合术',\n",
       " '动脉瘤栓结术',\n",
       " '动脉内膜剥脱术',\n",
       " '动脉修补术',\n",
       " '动脉硬化检测',\n",
       " '动脉植入心脏血管重建术',\n",
       " '动脉注射化疗药物',\n",
       " '动脉组织补片修补术',\n",
       " '断鼻再接术',\n",
       " '断耳再植术',\n",
       " '对比剂精囊造影',\n",
       " '多道人工耳蜗置换术',\n",
       " '多道人工耳蜗置入术',\n",
       " '多导睡眠脑电图',\n",
       " '多个眶壁减压术',\n",
       " '多根导管冠状动脉造影',\n",
       " '多克隆抗体治疗',\n",
       " '多块椎骨融合',\n",
       " '多胎妊娠减胎术',\n",
       " '多条眼外肌后徙术',\n",
       " '多条眼外肌缩短术',\n",
       " '多源光子放射治疗',\n",
       " '多指截指术',\n",
       " '额鼻管扩张',\n",
       " '额部皮瓣鼻重建术',\n",
       " '腭垂病损切除术',\n",
       " '腭垂部分切除术',\n",
       " '腭垂活组织检查',\n",
       " '腭垂裂修补术',\n",
       " '腭垂切除术',\n",
       " '腭垂切开术',\n",
       " '额窦病损切除术',\n",
       " '额骨重建术',\n",
       " '额肌瓣矫正术',\n",
       " '腭裂伤缝合术',\n",
       " '腭裂修补术',\n",
       " '腭裂修补术伴腭垂修补术',\n",
       " '腭裂修补术后修正术',\n",
       " '腭瘘管修补术',\n",
       " '腭切开探查术',\n",
       " '腭咽成形术',\n",
       " '额叶病损切除术',\n",
       " '额叶切除术',\n",
       " '耳垂畸形矫正术',\n",
       " '耳垂脓肿切开引流术',\n",
       " '耳垂造孔术',\n",
       " '耳大神经吻合术',\n",
       " '耳大神经移植术',\n",
       " '耳耵聍去除',\n",
       " '耳后病损切除术',\n",
       " '耳后瘘管修补术',\n",
       " '耳后皮肤扩张器植入术',\n",
       " '耳甲腔成形术',\n",
       " '二尖瓣瓣环成形术',\n",
       " '二尖瓣瓣周漏修补术',\n",
       " '二尖瓣闭式扩张术',\n",
       " '二尖瓣机械瓣膜置换术',\n",
       " '二尖瓣生物瓣膜置换术',\n",
       " '二尖瓣探查术',\n",
       " '耳镜检查',\n",
       " '耳廓成形术',\n",
       " '耳廓重建术',\n",
       " '耳廓切除术',\n",
       " '耳廓清创术',\n",
       " '耳廓缺损修补术',\n",
       " '耳廓支架取出术',\n",
       " '耳廓支架置入术',\n",
       " '耳廓植皮术',\n",
       " '耳内异物去除',\n",
       " '耳前病损切除术',\n",
       " '耳前窦道切除术',\n",
       " '耳前瘘管切除术',\n",
       " '耳前脓肿切开引流术',\n",
       " '耳前皮肤扩张器置入术',\n",
       " '耳蜗电极修正术',\n",
       " '耳蜗电图',\n",
       " '耳游离皮瓣移植术',\n",
       " '耳语听力试验',\n",
       " '法特氏壶腹切除术',\n",
       " '发音重建术',\n",
       " '法乐氏四联症根治术',\n",
       " '犯罪责任评估',\n",
       " '房间隔缺损闭式伞堵修补术',\n",
       " '房间隔缺损修补术',\n",
       " '房间隔缺损组织补片修补术',\n",
       " '房角分离术',\n",
       " '放疗后眼窝凹陷填充术',\n",
       " '放射敷贴器取出术',\n",
       " '放射敷贴器置入术',\n",
       " '放射疗法',\n",
       " '放射性粒子置入',\n",
       " '放射性铯远距离治疗',\n",
       " '放射性同位素远距离治疗',\n",
       " '放射性同位素注射',\n",
       " '肺病损切除术',\n",
       " '肺病损氩氦刀冷冻术',\n",
       " '肺彩色超声检查',\n",
       " '腓肠肌腱膜松解术',\n",
       " '腓肠神经移植术',\n",
       " '肺超声检查',\n",
       " '肺穿刺活组织检查',\n",
       " '肺穿刺术',\n",
       " '非穿透性小梁切除术',\n",
       " '肺大泡缝扎术',\n",
       " '肺动静脉瘘栓塞术',\n",
       " '肺动脉瓣闭式扩张术',\n",
       " '肺动脉瓣机械瓣膜置换术',\n",
       " '肺动脉瓣生物瓣膜置换术',\n",
       " '肺动脉瓣探查术',\n",
       " '肺动脉部分切除伴吻合术',\n",
       " '肺动脉环缩术',\n",
       " '肺动脉结扎术',\n",
       " '肺动脉内膜剥脱术',\n",
       " '肺动脉嵌入压监测',\n",
       " '肺动脉球囊扩张成形术',\n",
       " '腓动脉球囊扩张成形术',\n",
       " '肺动脉融合术',\n",
       " '肺动脉上腔静脉分流术',\n",
       " '肺动脉栓塞术',\n",
       " '肺动脉探查术',\n",
       " '肺动脉修补术',\n",
       " '肺动脉血栓切除术',\n",
       " '肺动脉压监测',\n",
       " '肺动脉造影',\n",
       " '肺功能测定',\n",
       " '腓骨病损切除术',\n",
       " '腓骨部分切除术',\n",
       " '腓骨长短肌腱延长术',\n",
       " '腓骨钢板内固定术',\n",
       " '腓骨钢针内固定术',\n",
       " '腓骨骨折闭合复位钢板内固定术',\n",
       " '腓骨骨折闭合复位钢针内固定术',\n",
       " '腓骨骨折闭合复位螺钉内固定术',\n",
       " '腓骨骨折闭合复位术',\n",
       " '腓骨骨折切开复位钢板内固定术',\n",
       " '腓骨骨折切开复位钢针内固定术',\n",
       " '腓骨骨折切开复位螺钉内固定术',\n",
       " '腓骨骨折切开复位术',\n",
       " '腓骨活组织检查',\n",
       " '腓骨开放性骨折清创术',\n",
       " '腓骨螺钉内固定术',\n",
       " '腓骨内固定物取出术',\n",
       " '腓骨切除术',\n",
       " '腓骨取骨术',\n",
       " '腓骨死骨去除术',\n",
       " '腓骨髓内针内固定术',\n",
       " '腓骨外固定架去除术',\n",
       " '腓骨外固定术',\n",
       " '腓骨小头切除术',\n",
       " '腓骨楔形截骨术',\n",
       " '腓骨延长术',\n",
       " '腓骨折骨术',\n",
       " '腓骨植骨术',\n",
       " '肺灌洗术',\n",
       " '肺减容术',\n",
       " '肺节段切除术',\n",
       " '肺静脉成形术',\n",
       " '肺静脉造影',\n",
       " '非可膨胀性阴茎假体置入术',\n",
       " '肺裂伤修补术',\n",
       " '肺门淋巴结根治性切除术',\n",
       " '肺门淋巴结切除术',\n",
       " '肺内异物取出术',\n",
       " '肺皮质剥除术',\n",
       " '肺切开引流术',\n",
       " '腓神经缝合术',\n",
       " '腓神经松解术',\n",
       " '腓神经吻合术',\n",
       " '肺同位素扫描',\n",
       " '肺楔形切除术',\n",
       " '肺修补术',\n",
       " '肺叶伴邻近肺叶节段切除术',\n",
       " '肺叶部分切除术',\n",
       " '肺叶切除术',\n",
       " '肺叶切除术伴淋巴结清扫术',\n",
       " '肺粘连松解术',\n",
       " '腓总神经松解术',\n",
       " '腓总神经探查术',\n",
       " '腓总神经吻合术',\n",
       " '粪便嵌顿去除',\n",
       " '分娩流产后电吸刮宫术',\n",
       " '分娩流产后刮宫术',\n",
       " '分娩时人工破膜',\n",
       " '腹白线疝无张力修补术',\n",
       " '腹壁白线疝修补术',\n",
       " '腹壁病损切除术',\n",
       " '腹壁补片修补术',\n",
       " '腹壁窦道扩创术',\n",
       " '腹壁窦道切开引流术',\n",
       " '腹壁窦道造影',\n",
       " '腹壁缝线去除',\n",
       " '腹壁活组织检查',\n",
       " '腹壁裂伤缝合术',\n",
       " '腹壁淋巴管瘤囊肿切除术',\n",
       " '腹壁脓肿切开引流术',\n",
       " '腹壁切开引流术',\n",
       " '腹壁切口裂开缝合术',\n",
       " '腹壁切口疝无张力修补术',\n",
       " '腹壁切口疝修补术',\n",
       " '腹壁疝修补术',\n",
       " '腹壁伤口扩创术',\n",
       " '腹壁伤口清创术',\n",
       " '腹壁血管结扎术',\n",
       " '腹壁血肿清除术',\n",
       " '腹壁异物取出术',\n",
       " '腹壁造口术',\n",
       " '腹部彩色超声检查',\n",
       " '腹部超声检查',\n",
       " '腹部核磁共振检查',\n",
       " '腹部静脉部分切除伴吻合术',\n",
       " '腹部静脉结扎术',\n",
       " '腹部淋巴管造影',\n",
       " '腹部平片',\n",
       " '腹部全厚皮片移植术',\n",
       " '腹部同位素扫描',\n",
       " '腹部血肿去除术',\n",
       " '附带阑尾切除术',\n",
       " '副耳切除术',\n",
       " '复发性肩关节脱位修补术',\n",
       " '附睾病损切除术',\n",
       " '附睾裂伤缝合术',\n",
       " '附睾囊肿切除术',\n",
       " '附睾切除术',\n",
       " '附睾切开探查术',\n",
       " '附睾输精管吻合术',\n",
       " '跗骨病损切除术',\n",
       " '跗骨成形术',\n",
       " '跗骨钢板内固定术',\n",
       " '跗骨钢针内固定术',\n",
       " '腹股沟病损切除术',\n",
       " '腹股沟淋巴结根治性切除术',\n",
       " '腹股沟淋巴结切除术',\n",
       " '腹股沟脓肿切开引流术',\n",
       " '腹股沟探查术',\n",
       " '跗骨骨折闭合复位钢针内固定术',\n",
       " '跗骨骨折闭合复位螺钉内固定术',\n",
       " '跗骨骨折闭合复位术',\n",
       " '跗骨骨折切开复位钢针内固定术',\n",
       " '跗骨骨折切开复位螺钉内固定术',\n",
       " '跗骨骨折切开复位术',\n",
       " '跗骨间融合术',\n",
       " '跗骨开放性骨折清创术',\n",
       " '跗骨螺钉内固定术',\n",
       " '跗骨内固定物取出术',\n",
       " '跗骨切除术',\n",
       " '跗骨死骨去除术',\n",
       " '跗骨髓内针内固定术',\n",
       " '跗骨外固定架去除术',\n",
       " '跗骨外固定术',\n",
       " '跗骨楔形截骨术',\n",
       " '跗骨延长术',\n",
       " '跗骨折骨术',\n",
       " '跗管松解术',\n",
       " '腹会阴拖出术',\n",
       " '妇科手法检查',\n",
       " '腹膜病损切除术',\n",
       " '腹膜缝合术',\n",
       " '腹膜后病损切除术',\n",
       " '腹膜后充气造影',\n",
       " '腹膜后活组织检查',\n",
       " '腹膜后淋巴管横断结扎术',\n",
       " '腹膜后淋巴管瘤囊肿切除术',\n",
       " '腹膜后淋巴结根治性切除术',\n",
       " '腹膜后瘘管造影',\n",
       " '腹膜后脓肿切开引流术',\n",
       " '腹膜后清扫术',\n",
       " '腹膜后疝修补术',\n",
       " '腹膜后引流管去除',\n",
       " '腹膜活组织检查',\n",
       " '腹膜切开术',\n",
       " '腹膜透析',\n",
       " '腹膜透析管去除',\n",
       " '腹膜透析置管术',\n",
       " '腹膜外病损切除术',\n",
       " '腹膜外脓肿切开引流术',\n",
       " '腹膜外剖宫产术',\n",
       " '腹膜外血肿清除术',\n",
       " '腹膜下血肿切除术',\n",
       " '腹膜血管结扎术',\n",
       " '腹膜血肿清除术',\n",
       " '腹膜引流管去除',\n",
       " '腹膜粘连松解术',\n",
       " '腹膜组织修补术',\n",
       " '腹内病损穿刺活组织检查',\n",
       " '腹内静脉造影',\n",
       " '副脾切除术',\n",
       " '腹腔病损穿刺活组织检查',\n",
       " '腹腔病损氩氦刀靶向冷冻治疗术',\n",
       " '腹腔冲洗检查',\n",
       " '腹腔穿刺术',\n",
       " '腹腔动脉造影',\n",
       " '腹腔化疗泵去除',\n",
       " '腹腔镜辅助人工阴道切除术',\n",
       " '腹腔镜检查',\n",
       " '腹腔颈静脉分流术',\n",
       " '腹腔静脉分流术',\n",
       " '腹腔静脉转流泵管置入术',\n",
       " '腹腔镜下残角子宫切除术',\n",
       " '腹腔镜下残留卵巢切除术',\n",
       " '腹腔镜下残留输卵管卵巢切除术',\n",
       " '腹腔镜下肠系膜病损切除术',\n",
       " '腹腔镜下肠粘连松解术',\n",
       " '腹腔镜下单侧腹股沟疝修补术',\n",
       " '腹腔镜下单侧腹股沟斜疝修补术',\n",
       " '腹腔镜下单侧卵巢切除术',\n",
       " '腹腔镜下单侧肾切除术',\n",
       " '腹腔镜下单侧肾上腺切除术',\n",
       " '腹腔镜下单侧输卵管结扎术',\n",
       " '腹腔镜下单侧输卵管卵巢切除术',\n",
       " '腹腔镜下单侧输卵管破坏术',\n",
       " '腹腔镜下单侧输卵管切除术',\n",
       " '腹腔镜下单侧隐睾切除术',\n",
       " '腹腔镜下胆道结石去除术',\n",
       " '腹腔镜下胆道探查术',\n",
       " '腹腔镜下胆道造影术',\n",
       " '腹腔镜下胆管病损切除术',\n",
       " '腹腔镜下胆囊部分切除术',\n",
       " '腹腔镜下胆囊切除术',\n",
       " '腹腔镜下胆囊取石术',\n",
       " '腹腔镜下胆囊造口术',\n",
       " '腹腔镜下胆总管取石术',\n",
       " '腹腔镜下胆总管探查术',\n",
       " '腹腔镜下骶韧带部分切除术',\n",
       " '腹腔镜下骶韧带切断术',\n",
       " '腹腔镜下腹壁病损切除术',\n",
       " '腹腔镜下腹壁活组织检查',\n",
       " '腹腔镜下腹壁疝修补术',\n",
       " '腹腔镜下附带阑尾切除术',\n",
       " '腹腔镜下腹膜病损切除术',\n",
       " '腹腔镜下腹膜代阴道术',\n",
       " '腹腔镜下腹膜后病损切除术',\n",
       " '腹腔镜下腹膜活组织检查',\n",
       " '腹腔镜下腹膜粘连松解术',\n",
       " '腹腔镜下腹腔病损切除术',\n",
       " '腹腔镜下腹腔积血清除术',\n",
       " '腹腔镜下腹腔局部注射',\n",
       " '腹腔镜下腹腔异位妊娠去除术',\n",
       " '腹腔镜下腹腔粘连松解术',\n",
       " '腹腔镜下肝病损切除术',\n",
       " '腹腔镜下肝病损射频消融术',\n",
       " '腹腔镜下肝部分切除术',\n",
       " '腹腔镜下肝活组织检查',\n",
       " '腹腔镜下肝门肠吻合术',\n",
       " '腹腔镜下肝门空肠吻合术',\n",
       " '腹腔镜下肝囊肿抽吸术',\n",
       " '腹腔镜下肝囊肿开窗引流术',\n",
       " '腹腔镜下肝内无水酒精注射术',\n",
       " '腹腔镜下肛门成形术',\n",
       " '腹腔镜下膈疝修补术',\n",
       " '腹腔镜下回肠代阴道术',\n",
       " '腹腔镜下结肠病损切除术',\n",
       " '腹腔镜下结肠部分切除术',\n",
       " '腹腔镜下结肠造口术',\n",
       " '腹腔镜下结肠止血术',\n",
       " '腹腔镜下筋膜外子宫切除术',\n",
       " '腹腔镜下精索静脉高位结扎术',\n",
       " '腹腔镜下巨结肠根治术',\n",
       " '腹腔镜下可调节胃束带放松术',\n",
       " '腹腔镜下可调节胃束带紧缩术',\n",
       " '腹腔镜下可调节胃束带去除术',\n",
       " '腹腔镜下可调节胃束带修正术',\n",
       " '腹腔镜下可调节胃束带置换术',\n",
       " '腹腔镜下空肠造口术',\n",
       " '腹腔镜下阑尾切除术',\n",
       " '腹腔镜下淋巴结活组织检查',\n",
       " '腹腔镜下卵巢病损切除术',\n",
       " '腹腔镜下卵巢病损烧灼术',\n",
       " '腹腔镜下卵巢部分切除术',\n",
       " '腹腔镜下卵巢抽吸术',\n",
       " '腹腔镜下卵巢打孔术',\n",
       " '腹腔镜下卵巢单纯缝合术',\n",
       " '腹腔镜下卵巢电凝术',\n",
       " '腹腔镜下卵巢动静脉高位结扎术',\n",
       " '腹腔镜下卵巢黄体破裂修补术',\n",
       " '腹腔镜下卵巢黄体破裂止血术',\n",
       " '腹腔镜下卵巢黄体切除术',\n",
       " '腹腔镜下卵巢活组织检查',\n",
       " '腹腔镜下卵巢囊肿穿刺术',\n",
       " '腹腔镜下卵巢囊肿造袋术',\n",
       " '腹腔镜下卵巢脓肿切开引流术',\n",
       " '腹腔镜下卵巢切开胚胎清除术',\n",
       " '腹腔镜下卵巢切开探查术',\n",
       " '腹腔镜下卵巢楔形切除术',\n",
       " '腹腔镜下卵巢修补术',\n",
       " '腹腔镜下卵巢悬吊术',\n",
       " '腹腔镜下卵巢再植入',\n",
       " '腹腔镜下卵巢造口术',\n",
       " '腹腔镜下盲肠部分切除术',\n",
       " '腹腔镜下尿道瘘修补术',\n",
       " '腹腔镜下膀胱部分切除术',\n",
       " '腹腔镜下膀胱根治性切除术',\n",
       " '腹腔镜下膀胱全部切除术',\n",
       " '腹腔镜下泡状附件电灼术',\n",
       " '腹腔镜下泡状附件切除术',\n",
       " '腹腔镜下盆腔病损切除术',\n",
       " ...]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['category'] == '手术']['term'].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4572110\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "with open(\"/data/leo/Projects/UER-py/corpora/R_512_bert.txt\",'r') as f:\n",
    "    for line in f.readlines():\n",
    "        line = line.strip()\n",
    "        if line:\n",
    "            count += 1\n",
    "print(count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = ['B', 'I', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'O']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'B I I I O O O O O O O O O B I I I I I O'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "b8f714736076dee7cc4e7cd002fa6517bd2bcb67ffacfab068ce43d1637242d6"
  },
  "kernelspec": {
   "display_name": "Python 3.9.1 64-bit ('base': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
